# Replication File for Seljan and Gronke, 2022, "Happy Birthday You Get
# To Vote".rm() 
#
# ANALYSIS: Oregon AVR analysis, OR 2016 election

sink("PSRM AVR OR 2016 Replication_log.txt", append=F, type="output")

#
library(dplyr)
library(lubridate)
library(mosaic)
library(stringr)
library(AER)
library(tidyverse)
library(tidymodels)
library(stargazer)
library(doBy)
library(margins)
library(radiant)
library(ivpack)     #
                    # Package archived on CRAN 6/20/2022
                    # To install
                    # library(devtools)
                    # install_version("ivpack", version = "1.2", repos = "http://cran.us.r-project.org")
library(equivtest)  # install_github("ekhartman/equivtest")
library(sjPlot)
library(sandwich)
library(splitstackshape)


options(scipen = 100, digits = 3)


OR_2016<-readRDS("Oregon/OR2016_anon.RDS")

#Create objects for notable dates/VEP
electionday2016<-yday(ymd(161108))
deadlineNov2016<-yday(ymd(161018))
cutoffNov2016<-yday(ymd(160927))
bdaysept272016<-yday(ymd(160927))
primary26<-yday(ymd(160517))
deadlineMay2016<-yday(ymd(160426))
bdayApril5<-yday(ymd(160405))
VEP=3024174
OR_2016$electionday2016<-electionday2016


#Compute dichotomous voting record. Inactive voters coded as "0".
OR_2016$voteyes_2016<-ifelse(OR_2016$X11.08.2016=="YES",1,0)
OR_2016$voteyes_2014<-ifelse(OR_2016$X11.04.2014=="YES",1,0)
OR_2016$voteyes_2012<-ifelse(OR_2016$X11.06.2012=="YES",1,0)

#Date Markers
OR_2016$reg_day_of_year<-yday(OR_2016$lub_regdate)
OR_2016$birth_year<-year(OR_2016$lub_birthdate)
OR_2016$birth_day_of_year<-yday(OR_2016$lub_birthdate)


# Tag new registrants by proxy
OR_2016 <-OR_2016 %>%
  mutate(new16=ifelse(reg_year==2016 & X11.04.2014!="NO" & X11.04.2014!="YES", 1,0))

# Create age groups
OR_2016<-OR_2016 %>%
  mutate(age1823=ifelse(age_at_2016election>=18 & age_at_2016election<=23, 1, 0),
         age2430=ifelse(age_at_2016election>=24 & age_at_2016election<=30, 1, 0),
         age3140=ifelse(age_at_2016election>=31 & age_at_2016election<=40, 1, 0),
         age4160=ifelse(age_at_2016election>=41 & age_at_2016election<=60, 1, 0),
         age61plus=ifelse(age_at_2016election>=61, 1, 0))

#Compute difference between registration and birth dates. 
OR_2016<-OR_2016 %>% mutate(doydifference365=case_when((reg_day_of_year-birth_day_of_year)<(-183) ~ reg_day_of_year-birth_day_of_year+366, 
                                              (reg_day_of_year-birth_day_of_year)>183 ~ (reg_day_of_year-birth_day_of_year)-366,
                                              TRUE~(reg_day_of_year-birth_day_of_year) ))
OR_2016<-OR_2016 %>% mutate(doydifference365=ifelse(doydifference365==183,-183, doydifference365))


# Create Dummies for party status
OR_2016$democrat<-ifelse(OR_2016$PARTY_CODE.x=="DEM",1,0)
OR_2016$republican<-ifelse(OR_2016$PARTY_CODE.x=="REP",1,0)
OR_2016<-OR_2016 %>% mutate(third_party=case_when(PARTY_CODE.x=="REP" ~ 0, 
                                         PARTY_CODE.x=="DEM" ~ 0,
                                         PARTY_CODE.x=="NAV" ~ 0,
                                        TRUE~1))

#Create instrument (Pre Sept 27 birthday) 
OR_2016 <- OR_2016  %>%
  mutate(preSept27bday = case_when(birth_day_of_year>bdaysept272016 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                   birth_day_of_year+1>bdaysept272016 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                   TRUE ~ 1))

OR_2016 <- OR_2016  %>%
  mutate(bdaygroups = case_when(birth_day_of_year<bdaysept272016 & leap_year(lub_birthdate)==TRUE ~ 0,
                                birth_day_of_year+1<bdaysept272016 & leap_year(lub_birthdate)==FALSE ~ 0,
                                 birth_day_of_year>=bdaysept272016 & birth_day_of_year<=deadlineNov2016  & leap_year(lub_birthdate)==TRUE ~ 1, 
                                   birth_day_of_year+1>=bdaysept272016 & birth_day_of_year+1<=deadlineNov2016  & leap_year(lub_birthdate)==FALSE ~ 1, 
                                   birth_day_of_year>deadlineNov2016 &birth_day_of_year<=electionday2016 & leap_year(lub_birthdate)==TRUE ~ 2, 
                                   birth_day_of_year+1>deadlineNov2016&birth_day_of_year+1<=electionday2016 & leap_year(lub_birthdate)==FALSE ~ 2,
                                birth_day_of_year>electionday2016 & leap_year(lub_birthdate)==TRUE ~ 3, 
                                birth_day_of_year+1>electionday2016 & leap_year(lub_birthdate)==FALSE ~ 3))


# Creates Instrumented variable (registered by Oct 18 in year 2016). 
OR_2016$reg2016_preOct18<-ifelse(OR_2016$lub_regdate<=ymd(20161018) & OR_2016$reg_year==2016,1,0)


# Limit data to those with birthdays 42 days prior and 42 days subsequent to September 27th. 
# Coding accounts for leap years. 

OR_2016<-OR_2016 %>%
  mutate(samplewindow = case_when(birth_day_of_year<229 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                  birth_day_of_year<228 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                  birth_day_of_year>313 & leap_year(lub_birthdate)==TRUE ~ 0,
                                  birth_day_of_year>312 & leap_year(lub_birthdate)==FALSE ~ 0,
                              TRUE ~ 1))

# subset of data = registered in 2016 in sample window
sample_OR2016_reg2016<-OR_2016 %>%
  filter(reg_year==2016 & samplewindow==1)

# Create variable equivalent to regyear2016 but named for universal use
OR_2016$reg_duringAVR<-OR_2016$regyear2016

# Create AVR factor indicator
OR_2016<-OR_2016 %>% mutate(AVR=case_when(DESCRIPTION=="OMV Phase2" ~ NA_real_,
                                          DESCRIPTION=="Traditional"~0,
                                          DESCRIPTION=="OMV Phase 1"~1))


OR_2016<-OR_2016 %>% mutate(AVR_label=case_when(DESCRIPTION=="OMV Phase2" ~ "",
                                          DESCRIPTION=="Traditional"~ "Traditional",
                                          DESCRIPTION=="OMV Phase 1"~ "AVR"))

# Dataframe exclusive for those registered post 2016 (AVR Phase 1 in place)
reg_duringAVR<-OR_2016 %>%
  filter(reg_year==2016)

# Is registration withing 30 days of birthdate?
OR_2016$doydiffby30<-ifelse(OR_2016$doydifference365<=30 &OR_2016$doydifference365>=-30,1,0)


### FIGURE 1 (Oregon Component)
ggplot(subset(reg_duringAVR, AVR==1 | AVR==0), aes(doydifference365))+
  geom_histogram(binwidth = 5, aes(y=..density..))+
  facet_grid(.~AVR_label)+ggtitle("2016 Oregon Registrants")+ 
  xlab("Difference between Birth Date and Registration Date")+ 
  geom_vline(xintercept = 0) + theme_minimal() +   
  scale_x_continuous(limits=c(-182,182), breaks=c(-182, -91, 0, 91, 182))+  
  scale_y_continuous(limits=c(0,0.015), breaks=c(0,0.005, 0.01))
 
ggsave("figures/figure_1_OR.png")

### TABLE 1 (Oregon Component) & Marginal Effects in text

logit_doydiffOR2016 <- glm(doydiffby30 ~ AVR, data =subset(OR_2016, reg_year==2016), family = "binomial")
summary(logit_doydiffOR2016, diagnostics=T)
effects_logit_doydiffOR2016 <- margins(logit_doydiffOR2016) 

saveRDS(logit_doydiffOR2016, file="Oregon/Object Files 2016/logit_doydiffOR2016.rds")


# Code to display modal date difference between birthday and registration for OMV Voters
OMV_Voters<-OR_2016 %>%
  filter(DESCRIPTION=="OMV Phase 1")  

Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}

Mode(OMV_Voters$doydifference365)


#### Equivalence tests used to create Figure 2

# Tests for full calendar year

dem0<-reg_duringAVR$democrat[reg_duringAVR$preSept27bday==0]
dem1<-reg_duringAVR$democrat[reg_duringAVR$preSept27bday==1]
dem_equiv<-fisher.binom.two.sided(dem0, dem1, eps_sub=.01, alpha=0.05)
dem_equiv<-c(dem_equiv$odds.ratio, dem_equiv$equiv.CI[1], dem_equiv$equiv.CI[2], "Democrat")

rep0<-reg_duringAVR$republican[reg_duringAVR$preSept27bday==0]
rep1<-reg_duringAVR$republican[reg_duringAVR$preSept27bday==1]
rep_equiv<-fisher.binom.two.sided(rep0, rep1, eps_sub=.01, alpha=0.05)
rep_equiv<-c(rep_equiv$odds.ratio, rep_equiv$equiv.CI[1], rep_equiv$equiv.CI[2], "Republican")

third0<-reg_duringAVR$third_party[reg_duringAVR$preSept27bday==0]
third1<-reg_duringAVR$third_party[reg_duringAVR$preSept27bday==1]
third_equiv<-fisher.binom.two.sided(third0, third1, eps_sub=.01, alpha=0.05)
third_equiv<-c(third_equiv$odds.ratio, third_equiv$equiv.CI[1], third_equiv$equiv.CI[2], "Third Party")

white0<-reg_duringAVR$white[reg_duringAVR$preSept27bday==0]
white1<-reg_duringAVR$white[reg_duringAVR$preSept27bday==1]
white_equiv<-fisher.binom.two.sided(white0, white1, eps_sub=.01, alpha=0.05)
white_equiv<-c(white_equiv$odds.ratio, white_equiv$equiv.CI[1], white_equiv$equiv.CI[2], "White")

black0<-reg_duringAVR$black[reg_duringAVR$preSept27bday==0]
black1<-reg_duringAVR$black[reg_duringAVR$preSept27bday==1]
black_equiv<-fisher.binom.two.sided(black0, black1, eps_sub=.01, alpha=0.05)
black_equiv<-c(black_equiv$odds.ratio, black_equiv$equiv.CI[1], black_equiv$equiv.CI[2], "Black")

hispanic0<-reg_duringAVR$hispanic[reg_duringAVR$preSept27bday==0]
hispanic1<-reg_duringAVR$hispanic[reg_duringAVR$preSept27bday==1]
hispanic_equiv<-fisher.binom.two.sided(hispanic0, hispanic1, eps_sub=.01, alpha=0.05)
hispanic_equiv<-c(hispanic_equiv$odds.ratio, hispanic_equiv$equiv.CI[1], hispanic_equiv$equiv.CI[2], "Hispanic")

asian0<-reg_duringAVR$asian[reg_duringAVR$preSept27bday==0]
asian1<-reg_duringAVR$asian[reg_duringAVR$preSept27bday==1]
asian_equiv<-fisher.binom.two.sided(asian0, asian1, eps_sub=.01, alpha=0.05)
asian_equiv<-c(asian_equiv$odds.ratio, asian_equiv$equiv.CI[1], asian_equiv$equiv.CI[2], "Asian")

age18230<-reg_duringAVR$age1823[reg_duringAVR$preSept27bday==0]
age18231<-reg_duringAVR$age1823[reg_duringAVR$preSept27bday==1]
age1823_equiv<-fisher.binom.two.sided(age18230, age18231, eps_sub=.01, alpha=0.05)
age1823_equiv<-c(age1823_equiv$odds.ratio, age1823_equiv$equiv.CI[1], age1823_equiv$equiv.CI[2], "Age 18-23")

age24300<-reg_duringAVR$age2430[reg_duringAVR$preSept27bday==0]
age24301<-reg_duringAVR$age2430[reg_duringAVR$preSept27bday==1]
age2430_equiv<-fisher.binom.two.sided(age24300, age24301, eps_sub=.01, alpha=0.05)
age2430_equiv<-c(age2430_equiv$odds.ratio, age2430_equiv$equiv.CI[1], age2430_equiv$equiv.CI[2], "Age 24-30")

age31400<-reg_duringAVR$age3140[reg_duringAVR$preSept27bday==0]
age31401<-reg_duringAVR$age3140[reg_duringAVR$preSept27bday==1]
age3140_equiv<-fisher.binom.two.sided(age31400, age31401, eps_sub=.01, alpha=0.05)
age3140_equiv<-c(age3140_equiv$odds.ratio, age3140_equiv$equiv.CI[1], age3140_equiv$equiv.CI[2], "Age 31-40")

age41600<-reg_duringAVR$age4160[reg_duringAVR$preSept27bday==0]
age41601<-reg_duringAVR$age4160[reg_duringAVR$preSept27bday==1]
age4160_equiv<-fisher.binom.two.sided(age41600, age41601, eps_sub=.01, alpha=0.05)
age4160_equiv<-c(age4160_equiv$odds.ratio, age4160_equiv$equiv.CI[1], age4160_equiv$equiv.CI[2], "Age 41-60")

age61plus0<-reg_duringAVR$age61plus[reg_duringAVR$preSept27bday==0]
age61plus1<-reg_duringAVR$age61plus[reg_duringAVR$preSept27bday==1]
age61plus_equiv<-fisher.binom.two.sided(age61plus0, age61plus1, eps_sub=.01, alpha=0.05)
age61plus_equiv<-c(age61plus_equiv$odds.ratio, age61plus_equiv$equiv.CI[1], age61plus_equiv$equiv.CI[2], "Age 61+")

data_equiv<-rbind(third_equiv,  rep_equiv, dem_equiv, asian_equiv, black_equiv, hispanic_equiv, white_equiv,  age61plus_equiv, age4160_equiv,  age3140_equiv, age2430_equiv, age1823_equiv)
data_equiv<-data.frame(data_equiv)
data_equiv<-data_equiv %>%
  rename(estimate=X1, lower=X2, upper=X3, name=X4)

data_equiv$estimate<-as.numeric(data_equiv$estimate) 
data_equiv$lower<-as.numeric(data_equiv$lower) 
data_equiv$upper<-as.numeric(data_equiv$upper) 
data_equiv$year<-"OR 2016"
data_equiv$full<-"Full Implementation Period"

equivalence_tests<-data_equiv


# Sample window only tests 

dem0<-sample_OR2016_reg2016$democrat[sample_OR2016_reg2016$preSept27bday==0]
dem1<-sample_OR2016_reg2016$democrat[sample_OR2016_reg2016$preSept27bday==1]
dem_equiv<-fisher.binom.two.sided(dem0, dem1, eps_sub=.01, alpha=0.05)
dem_equiv<-c(dem_equiv$odds.ratio, dem_equiv$equiv.CI[1], dem_equiv$equiv.CI[2], "Democrat")

rep0<-sample_OR2016_reg2016$republican[sample_OR2016_reg2016$preSept27bday==0]
rep1<-sample_OR2016_reg2016$republican[sample_OR2016_reg2016$preSept27bday==1]
rep_equiv<-fisher.binom.two.sided(rep0, rep1, eps_sub=.01, alpha=0.05)
rep_equiv<-c(rep_equiv$odds.ratio, rep_equiv$equiv.CI[1], rep_equiv$equiv.CI[2], "Republican")

third0<-sample_OR2016_reg2016$third_party[sample_OR2016_reg2016$preSept27bday==0]
third1<-sample_OR2016_reg2016$third_party[sample_OR2016_reg2016$preSept27bday==1]
third_equiv<-fisher.binom.two.sided(third0, third1, eps_sub=.01, alpha=0.05)
third_equiv<-c(third_equiv$odds.ratio, third_equiv$equiv.CI[1], third_equiv$equiv.CI[2], "Third Party")

white0<-sample_OR2016_reg2016$white[sample_OR2016_reg2016$preSept27bday==0]
white1<-sample_OR2016_reg2016$white[sample_OR2016_reg2016$preSept27bday==1]
white_equiv<-fisher.binom.two.sided(white0, white1, eps_sub=.01, alpha=0.05)
white_equiv<-c(white_equiv$odds.ratio, white_equiv$equiv.CI[1], white_equiv$equiv.CI[2], "White")

black0<-sample_OR2016_reg2016$black[sample_OR2016_reg2016$preSept27bday==0]
black1<-sample_OR2016_reg2016$black[sample_OR2016_reg2016$preSept27bday==1]
black_equiv<-fisher.binom.two.sided(black0, black1, eps_sub=.01, alpha=0.05)
black_equiv<-c(black_equiv$odds.ratio, black_equiv$equiv.CI[1], black_equiv$equiv.CI[2], "Black")

hispanic0<-sample_OR2016_reg2016$hispanic[sample_OR2016_reg2016$preSept27bday==0]
hispanic1<-sample_OR2016_reg2016$hispanic[sample_OR2016_reg2016$preSept27bday==1]
hispanic_equiv<-fisher.binom.two.sided(hispanic0, hispanic1, eps_sub=.01, alpha=0.05)
hispanic_equiv<-c(hispanic_equiv$odds.ratio, hispanic_equiv$equiv.CI[1], hispanic_equiv$equiv.CI[2], "Hispanic")

asian0<-sample_OR2016_reg2016$asian[sample_OR2016_reg2016$preSept27bday==0]
asian1<-sample_OR2016_reg2016$asian[sample_OR2016_reg2016$preSept27bday==1]
asian_equiv<-fisher.binom.two.sided(asian0, asian1, eps_sub=.01, alpha=0.05)
asian_equiv<-c(asian_equiv$odds.ratio, asian_equiv$equiv.CI[1], asian_equiv$equiv.CI[2], "Asian")

age18230<-sample_OR2016_reg2016$age1823[sample_OR2016_reg2016$preSept27bday==0]
age18231<-sample_OR2016_reg2016$age1823[sample_OR2016_reg2016$preSept27bday==1]
age1823_equiv<-fisher.binom.two.sided(age18230, age18231, eps_sub=.01, alpha=0.05)
age1823_equiv<-c(age1823_equiv$odds.ratio, age1823_equiv$equiv.CI[1], age1823_equiv$equiv.CI[2], "Age 18-23")


age24300<-sample_OR2016_reg2016$age2430[sample_OR2016_reg2016$preSept27bday==0]
age24301<-sample_OR2016_reg2016$age2430[sample_OR2016_reg2016$preSept27bday==1]
age2430_equiv<-fisher.binom.two.sided(age24300, age24301, eps_sub=.01, alpha=0.05)
age2430_equiv<-c(age2430_equiv$odds.ratio, age2430_equiv$equiv.CI[1], age2430_equiv$equiv.CI[2], "Age 24-30")

age31400<-sample_OR2016_reg2016$age3140[sample_OR2016_reg2016$preSept27bday==0]
age31401<-sample_OR2016_reg2016$age3140[sample_OR2016_reg2016$preSept27bday==1]
age3140_equiv<-fisher.binom.two.sided(age31400, age31401, eps_sub=.01, alpha=0.05)
age3140_equiv<-c(age3140_equiv$odds.ratio, age3140_equiv$equiv.CI[1], age3140_equiv$equiv.CI[2], "Age 31-40")

age41600<-sample_OR2016_reg2016$age4160[sample_OR2016_reg2016$preSept27bday==0]
age41601<-sample_OR2016_reg2016$age4160[sample_OR2016_reg2016$preSept27bday==1]
age4160_equiv<-fisher.binom.two.sided(age41600, age41601, eps_sub=.01, alpha=0.05)
age4160_equiv<-c(age4160_equiv$odds.ratio, age4160_equiv$equiv.CI[1], age4160_equiv$equiv.CI[2], "Age 41-60")

age61plus0<-sample_OR2016_reg2016$age61plus[sample_OR2016_reg2016$preSept27bday==0]
age61plus1<-sample_OR2016_reg2016$age61plus[sample_OR2016_reg2016$preSept27bday==1]
age61plus_equiv<-fisher.binom.two.sided(age61plus0, age61plus1, eps_sub=.01, alpha=0.05)
age61plus_equiv<-c(age61plus_equiv$odds.ratio, age61plus_equiv$equiv.CI[1], age61plus_equiv$equiv.CI[2], "Age 61+")

data_equiv<-rbind(third_equiv,  rep_equiv, dem_equiv, asian_equiv, black_equiv, hispanic_equiv, white_equiv,  age61plus_equiv, age4160_equiv,  age3140_equiv, age2430_equiv, age1823_equiv)
data_equiv<-data.frame(data_equiv)
data_equiv<-data_equiv %>%
  rename(estimate=X1, lower=X2, upper=X3, name=X4)

data_equiv$estimate<-as.numeric(data_equiv$estimate) 
data_equiv$lower<-as.numeric(data_equiv$lower) 
data_equiv$upper<-as.numeric(data_equiv$upper) 
data_equiv$year<-"OR 2016"
data_equiv$full<-"Birthday Window"

equivalence_tests<-rbind(equivalence_tests, data_equiv)

equivalence_tests$name <- factor(equivalence_tests$name, levels=unique(equivalence_tests$name))

saveRDS(equivalence_tests, file="Oregon/Object Files 2016/OR16_equiv.rds")


### Test for differential registration bias


### Differential registration bias test
votersbybirthdate_sample<- sample_OR2016_reg2016 %>%
  group_by(birth_day_of_year) %>%
  summarise(sumvoters=n(), Sept27=max(preSept27bday))

##this is a copy of US_births_1994-2003_CDC_NCHS.csv from 538’s Github
births <- read_csv(file="https://raw.githubusercontent.com/fivethirtyeight/data/master/births/US_births_1994-2003_CDC_NCHS.csv")

##sum of births by birthdate
birthsbyyear<-summaryBy(births ~ month + date_of_month, data=births, FUN=sum)

birthsbyyear<-birthsbyyear %>%
  mutate(birth_day_of_year = dplyr::row_number())

votersbybirthdate_sample<-left_join(votersbybirthdate_sample, birthsbyyear, by="birth_day_of_year")
votersbybirthdate_sample$prop_voters<-votersbybirthdate_sample$sumvoters/votersbybirthdate_sample$births.sum
reg_balance_sample<-t.test(prop_voters~Sept27, data=votersbybirthdate_sample)
reg_balance_sample

votersbybirthdate_full<- reg_duringAVR %>%
  group_by(birth_day_of_year) %>%
  summarise(sumvoters=n(), Sept27=max(preSept27bday))

votersbybirthdate_full<-left_join(votersbybirthdate_full, birthsbyyear, by="birth_day_of_year")
votersbybirthdate_full$prop_voters<-votersbybirthdate_full$sumvoters/votersbybirthdate_full$births.sum
reg_balance_full<-t.test(prop_voters~Sept27, data=votersbybirthdate_full)
reg_balance_full


### Table 3 (Oregon 2016 Component)

# 1 = Wald Estimation for subset registered during AVR in birthday window
table(sample_OR2016_reg2016$preSept27bday)
prop1_1<-prop.test(reg2016_preOct18~preSept27bday, data=sample_OR2016_reg2016, success=1)
firststage1<-prop1_1$estimate[2] - prop1_1$estimate[1]
prop2_1<-prop.test(voteyes_2016~preSept27bday, data=sample_OR2016_reg2016, success=1)
effect1<-prop2_1$estimate[2] - prop2_1$estimate[1]
wald1<-effect1/firststage1
prop1_1
firststage1
prop2_1
effect1
wald1

iv1_wald<-ivreg(voteyes_2016 ~ reg2016_preOct18  | preSept27bday, data=sample_OR2016_reg2016)
summary(iv1_wald, diagnostics = TRUE)


### Table 4

# Placebo test for year 2012 with Sept 27 birthday window
deadlineNov2012<-yday(ymd(20121016))
OR_2016$reg2012_preOct18<-ifelse(OR_2016$reg_day_of_year<=deadlineNov2012 & OR_2016$reg_year==2012,1,0)


prop1_placebo<-prop.test(reg2012_preOct18~preSept27bday, data=subset(OR_2016, reg_year==2012 & samplewindow==1 & age_at_2016election>=22), success=1)
firststage_placebo<-prop1_placebo$estimate[2] - prop1_placebo$estimate[1]
prop2_placebo<-prop.test(voteyes_2012~preSept27bday, data=subset(OR_2016, reg_year==2012 & samplewindow==1 & age_at_2016election>=22), success=1)
effect_placebo<-prop2_placebo$estimate[2] - prop2_placebo$estimate[1]
wald1_placebo<-effect_placebo/firststage_placebo

prop1_placebo
firststage_placebo
prop2_placebo
effect_placebo
wald1_placebo

iv_placebo1<-ivreg(voteyes_2012 ~ reg2012_preOct18  | preSept27bday, data=subset(OR_2016, reg_year==2012 & samplewindow==1 & age_at_2016election>=22))
summary(iv_placebo1, diagnostics = TRUE)


OR_2016 %>%
  filter(reg_year==2012 & samplewindow==1 & age_at_2016election>=22) %>%
  count(preSept27bday)

# Placebo test for year 2012 with Oct 16 birthday window



OR_2016$reg2012_preOct16<-ifelse(OR_2016$reg_day_of_year<=deadlineNov2012 & OR_2016$reg_year==2012,1,0)

OR_2016 <- OR_2016  %>%
  mutate(preOct16bday = case_when(birth_day_of_year>deadlineNov2012 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                  birth_day_of_year+1>deadlineNov2012 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                  TRUE ~ 1))

OR_2016<-OR_2016 %>%
  mutate(samplewindow2 = case_when(birth_day_of_year<269 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                   birth_day_of_year<268 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                   birth_day_of_year>311 & leap_year(lub_birthdate)==TRUE ~ 0,
                                   birth_day_of_year>310 & leap_year(lub_birthdate)==FALSE ~ 0,
                                   TRUE ~ 1))

OR_2016 %>%
  filter(reg_year==2012 & samplewindow2==1 & age_at_2016election>=22) %>%
  count(preOct16bday)

prop1_placebo2<-prop.test(reg2012_preOct16~preOct16bday, data=subset(OR_2016, reg_year==2012 & samplewindow2==1 & age_at_2016election>=22), success=1)
firststage_placebo2<-prop1_placebo2$estimate[2] - prop1_placebo2$estimate[1]
prop2_placebo2<-prop.test(voteyes_2012~preOct16bday, data=subset(OR_2016, reg_year==2012 & samplewindow2==1 & age_at_2016election>=22), success=1)
effect_placebo2<-prop2_placebo2$estimate[2] - prop2_placebo2$estimate[1]
wald1_placebo2<-effect_placebo2/firststage_placebo2

prop1_placebo2
firststage_placebo2
prop2_placebo2
effect_placebo2
wald1_placebo2


OR_2016 %>%
  filter(reg_year==2012 & samplewindow2==1 & age_at_2016election>=22) %>%
  group_by(preOct16bday) %>%
  summarise(n=n())

iv_placebo2<-ivreg(voteyes_2012 ~ reg2012_preOct16  | preOct16bday, data=subset(OR_2016, reg_year==2012 & samplewindow2==1 & age_at_2016election>=22))

summary(iv_placebo2, diagnostics = TRUE)

# TABLE 5 Instrumental variable regressions 

iv1OR16<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=sample_OR2016_reg2016)

ivnewOR16<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, new16==1))  

iv1OR16.sum<- summary(iv1OR16, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivnewOR16.sum<- summary(ivnewOR16, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)

iv1OR16.sum
ivnewOR16.sum

iv1OR16.rob<- coeftest(iv1OR16, function(x) vcovHC(x, type="HC0"))
ivnewOR16.rob<- coeftest(ivnewOR16, function(x) vcovHC(x, type="HC0"))

save(iv1OR16, ivnewOR16, iv1OR16.rob, ivnewOR16.rob, ivnewOR16.sum, iv1OR16.sum, file = "Oregon/Object Files 2016/OR 16 iv main robust.RData")


# TABLE 6 Subgroup Regressions

iv18_23<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, age_at_2016election>=18 & age_at_2016election<=23 & new16==1))  
iv24_30<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, age_at_2016election>=24 & age_at_2016election<=30 & new16==1))  
iv31_40<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, age_at_2016election>=31 & age_at_2016election<=40 & new16==1))  
iv41_60<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, age_at_2016election>=41 & age_at_2016election<=60 & new16==1)) 
iv61plus<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2,data=subset(sample_OR2016_reg2016,  age_at_2016election>=61 & new16==1))  

ivfemale<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, gender_female==1 & new16==1))
ivmale<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, gender_female==0 & new16==1)) 

ivpop<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, populous_county==1 & new16==1))  
ivnonpop<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, populous_county==0 & new16==1))  

ivwhite<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, white==1 & new16==1))  
ivblack<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, black==1 & new16==1))  
ivhispanic<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, hispanic==1 & new16==1))  
ivasian<-ivreg(voteyes_2016 ~ reg2016_preOct18 + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2 | preSept27bday+populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + gender3_1 + gender3_2, data=subset(sample_OR2016_reg2016, asian==1 & new16==1))  


iv18_23.sum<- summary( iv18_23, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv24_30.sum<- summary( iv24_30, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv31_40.sum<- summary( iv31_40, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv41_60.sum<- summary( iv41_60, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv61plus.sum<- summary( iv61plus, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivfemale.sum<- summary( ivfemale, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivmale.sum<- summary( ivmale, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivpop.sum<- summary( ivpop, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivnonpop.sum<- summary( ivnonpop, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivwhite.sum<- summary( ivwhite, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivblack.sum<- summary( ivblack, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivhispanic.sum<- summary( ivhispanic, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ivasian.sum<- summary( ivasian, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)


iv18_23.rob<- coeftest( iv18_23, function(x) vcovHC(x, type="HC0"))
iv24_30.rob<- coeftest( iv24_30, function(x) vcovHC(x, type="HC0"))
iv31_40.rob<- coeftest( iv31_40, function(x) vcovHC(x, type="HC0"))
iv41_60.rob<- coeftest( iv41_60, function(x) vcovHC(x, type="HC0"))
iv61plus.rob<- coeftest( iv61plus, function(x) vcovHC(x, type="HC0"))
ivfemale.rob<- coeftest( ivfemale, function(x) vcovHC(x, type="HC0"))
ivmale.rob<- coeftest( ivmale, function(x) vcovHC(x, type="HC0"))
ivpop.rob<- coeftest( ivpop, function(x) vcovHC(x, type="HC0"))
ivnonpop.rob<- coeftest( ivnonpop, function(x) vcovHC(x, type="HC0"))
ivwhite.rob<- coeftest( ivwhite, function(x) vcovHC(x, type="HC0"))
ivblack.rob<- coeftest( ivblack, function(x) vcovHC(x, type="HC0"))
ivhispanic.rob<- coeftest( ivhispanic, function(x) vcovHC(x, type="HC0"))
ivasian.rob<- coeftest( ivasian, function(x) vcovHC(x, type="HC0"))


gaze.coeft <- function(x, col="Std. Error"){
  stopifnot(is.list(x))
  out <- lapply(x, function(y){
    y[ , col]
  })
  return(out)
}

gaze.lines.ivreg.diagn <- function(x, col="statistic", row=1:3, digits=1){
  stopifnot(is.list(x))
  out <- lapply(x, function(y){
    stopifnot(class(y)=="summary.ivreg")
    y$diagnostics[row, col, drop=FALSE]
  })
  out <- as.list(data.frame(t(as.data.frame(out)), check.names = FALSE))
  for(i in 1:length(out)){
    out[[i]] <- c(names(out)[i], round(out[[i]], digits=digits))
  }
  return(out)
}


stargazer(iv18_23,  iv24_30,  iv31_40,  iv41_60,  iv61plus,  ivfemale,  ivmale,  ivpop,  ivnonpop,  ivwhite,  ivblack,  ivhispanic,  ivasian, keep="reg2016_preOct18",
          dep.var.labels =NULL, column.labels= c("Aged 18-23", "Aged 24-30", "Aged 31-40", "Aged 41-60", "Aged 61 Plus", "Female", "Male", "Populous County", "Nonpopulous County", "White", "Black", "Hispanic", "Asian"),
          se = gaze.coeft(list(iv18_23.rob,  iv24_30.rob,  iv31_40.rob,  iv41_60.rob,  iv61plus.rob,  ivfemale.rob,  ivmale.rob,  ivpop.rob,  ivnonpop.rob,  ivwhite.rob,  ivblack.rob,  ivhispanic.rob,  ivasian.rob)), df=FALSE, omit.stat=c("adj.rsq","ser"),
          add.lines = gaze.lines.ivreg.diagn(list( iv18_23.sum,  iv24_30.sum,  iv31_40.sum,  iv41_60.sum,  iv61plus.sum,  ivfemale.sum,  ivmale.sum,  ivpop.sum,  ivnonpop.sum,  ivwhite.sum,  ivblack.sum,  ivhispanic.sum,  ivasian.sum), row=1), out="Oregon/Object Files 2016/OR16_subgroupiv.tex")



#### Table 7

# Existing registrants by birth year 
registered_count<-OR_2016 %>%
  select(birth_year) %>%
  group_by(birth_year) %>%
  mutate(registered=sum(n())) %>%
  distinct()

# OR Population by Birth year in 2016 (imputed estimates as of Jan 1)
population <- tibble::tribble(
  ~population, ~birth_year,
  NA,       2016L,
  46165.5,       2015L,
  46952.5,       2014L,
  47166,       2013L,
  47154,       2012L,
  47583.5,       2011L,
  48183,       2010L,
  48352.5,       2009L,
  49130.5,       2008L,
  49872,       2007L,
  49506,       2006L,
  48901.5,       2005L,
  48676.5,       2004L,
  48517,       2003L,
  47999,       2002L,
  48472,       2001L,
  49728.5,       2000L,
  50085.5,       1999L,
  49433,       1998L,
  48746.5,       1997L,
  49185.5,       1996L,
  50593.5,       1995L,
  52272,       1994L,
  54019,       1993L,
  55865,       1992L,
  58454,       1991L,
  59969.5,       1990L,
  58611,       1989L,
  56829.5,       1988L,
  56081.5,       1987L,
  56314.5,       1986L,
  57538.5,       1985L,
  57528,       1984L,
  57418,       1983L,
  58258,       1982L,
  58098.5,       1981L,
  58332.5,       1980L,
  57005.5,       1979L,
  54709.5,       1978L,
  53900,       1977L,
  52652,       1976L,
  52116,       1975L,
  51212.5,       1974L,
  49845.5,       1973L,
  50398,       1972L,
  52899.5,       1971L,
  54892,       1970L,
  53522,       1969L,
  50406.5,       1968L,
  48592.5,       1967L,
  48317,       1966L,
  49468,       1965L,
  50977.5,       1964L,
  51929,       1963L,
  52935.5,       1962L,
  53960.5,       1961L,
  55043,       1960L,
  54795.5,       1959L,
  54366,       1958L,
  55152,       1957L,
  55059.5,       1956L,
  54966,       1955L,
  55400,       1954L,
  55035.5,       1953L,
  53786,       1952L,
  51984.5,       1951L,
  50498,       1950L,
  49060.5,       1949L,
  47711.5,       1948L,
  48355,       1947L,
  42363.5,       1946L,
  35077.5,       1945L,
  34356,       1944L,
  33814.5,       1943L,
  31290.5,       1942L,
  27337,       1941L,
  25062.5,       1940L,
  23200,       1939L,
  21356,       1938L,
  19664,       1937L,
  18044.5,       1936L,
  16633.5,       1935L,
  15094.5,       1934L,
  13807,       1933L,
  13026,       1932L
)

# Calculate difference between registration and population in subset
population<-left_join(population, registered_count, by="birth_year")

population<-population %>%
  filter(birth_year>=1932 & birth_year<=1985)

population$unregistered<-population$population-population$registered
population$not_registered<-1

# Create dataset with row for each unregistered person
unregistered_expanded <-expandRows(population, "unregistered")
unregistered_expanded <-unregistered_expanded %>%
  select(-registered, -population)

# Limit data to relevant agre group and new registrants eligible to vote in Nov. General
OR_2016_full<-OR_2016 %>%
  filter(birth_year>=1932 & birth_year<=1985) %>%
  filter(lub_regdate<=mdy(10182016) | X11.08.2016!="-")

# Add unregistered individuals to data
OR_2016_full<-  bind_rows(OR_2016_full, unregistered_expanded)

# Create instrument
OR_2016_full$evenbirthyear<-ifelse(OR_2016_full$birth_year %% 2 == 0, 1, 0)

# Fill in known data for unregistered
OR_2016_full$DESCRIPTION <- ifelse(is.na(OR_2016_full$DESCRIPTION)==TRUE, "Not Registered", OR_2016_full$DESCRIPTION)
OR_2016_full$status<-OR_2016_full$DESCRIPTION 
OR_2016_full$status <- factor(OR_2016_full$status, levels = c(levels(OR_2016_full$status), "Not Registered"))
OR_2016_full$status[is.na(OR_2016_full$status)] <- "Not Registered"
OR_2016_full$new16[is.na(OR_2016_full$new16)] <- 0
OR_2016_full$voterhistory_byregyear[is.na(OR_2016_full$voterhistory_byregyear)] <- 0
OR_2016_full$voteyes_2016[is.na(OR_2016_full$voteyes_2016)] <- 0


# Limit to new registrants in 2016 and unregistered
OR_2016_full_new2016<-OR_2016_full %>%
  filter(reg_year==2016 | not_registered==1)  %>%
  filter(new16==1 | not_registered==1) %>%
  filter(birth_year >= 1932 & birth_year <= 1985) %>%
  filter(DESCRIPTION!="2" | not_registered==1)

# Create age variable
OR_2016_full_new2016<-OR_2016_full_new2016 %>%
  mutate(age=2016-birth_year)

# Dummy variable for 2016 registration by any means
OR_2016_full_new2016<-OR_2016_full_new2016 %>%
  mutate(any_reg_2016=ifelse(reg_year==2016, 1, 0)) 
OR_2016_full_new2016$any_reg_2016[is.na(OR_2016_full_new2016$any_reg_2016)] <- 0

ORiv_any_vote_new<-ivreg(voteyes_2016 ~  any_reg_2016 + age | evenbirthyear + age  , data=subset(OR_2016_full_new2016, new16==1 | not_registered==1))
summary(ORiv_any_vote_new, diagnostics = TRUE)
ORiv_any_vote_new.rob  <- coeftest(ORiv_any_vote_new, function(x) vcovHC(x, type="HC0"))
ORiv_any_vote_new.sum<- summary(ORiv_any_vote_new, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
ORiv_any_vote_new.sum

save(ORiv_any_vote_new,ORiv_any_vote_new.rob, ORiv_any_vote_new.sum, file = "Oregon/Object Files 2016/OR iv sensitivity.RData")

# Figure 2
ggplot_highlight2016<-OR_2016 %>%
  filter(reg_year==2016) %>%
  filter(birth_year<=1985 & birth_year>=1934, AVR==1) %>%
  filter(birth_year %% 2 == 0) %>%
  group_by(birth_year) %>%
  summarize(count=n()) 

# Figure Oregon portion 

OR_2016 %>%
  filter(reg_year==2016) %>%
  filter(birth_year>=1934, birth_year<1997, AVR==1) %>%
  group_by(birth_year) %>%
  summarize(count=n()) %>%
  ggplot(aes(x = birth_year, y=count)) + ylab("Count Registered by AVR ") + xlab("Birth Year") + ggtitle("Oregon 2016") + geom_point() + geom_line() +
  geom_point(data=ggplot_highlight2016, aes(x = birth_year, y=count), size=3, shape="square") +
  theme_minimal()

ggsave("figures/figure3_OR.png")

### Statistics presented in section entitled "Registration and Total Turnout Effect"


# Count of Number of AVR Voters Between May 1 and Nov 1
AVR_count<-OR_2016 %>%
  filter(DESCRIPTION=="OMV Phase 1") %>%
  filter(new16==1) %>%
  filter(lub_regdate>=mdy(05012016) & lub_regdate<mdy(11012016)) %>%
  summarize(n=n())

AVR_count<-as.numeric(AVR_count)

# How does birth year affect AVR vs Traditional Registration
OR_2016_full_new2016$AVR<-replace_na(OR_2016_full_new2016$AVR, 0)
OR_2016_full_new2016$traditional2016<-ifelse(OR_2016_full_new2016$DESCRIPTION=="3" , 1, 0)

prop1_1_avr<-prop.test(AVR~evenbirthyear, data=subset(OR_2016_full_new2016, birth_year>=1932 & birth_year<=1985, success=1))
firststage1_avr<-prop1_1_avr$estimate[2] - prop1_1_avr$estimate[1]
firststage1_avr
increase_avr<-(prop1_1_avr$estimate[1]-prop1_1_avr$estimate[2])/prop1_1_avr$estimate[2]
increase_avr
prop1_1_trad<-prop.test(traditional2016~evenbirthyear, data=subset(OR_2016_full_new2016, birth_year>=1932 & birth_year<=1985, success=1))
firststage1_trad<-prop1_1_trad$estimate[2] - prop1_1_trad$estimate[1]
firststage1_trad
increase_trad<-(prop1_1_trad$estimate[1]-prop1_1_trad$estimate[2])/prop1_1_trad$estimate[2]
increase_trad

firststage1_trad/firststage1_avr

would_be_trad<-as.numeric(firststage1_trad/firststage1_avr)*-1
would_be_trad

###### Using historical even-odd instrument to calculate percent counterfactual motor voter

OR_2016$evenbirthyear<-ifelse(OR_2016$birth_year %% 2 == 0, 1, 0)

OR_2016 <- OR_2016 %>%
  mutate(high_prob_reg=case_when(
    evenbirthyear==1 & reg_year%% 2 == 0  ~ 1, 
    evenbirthyear==0 & reg_year%% 2 == 1  ~ 1, 
    TRUE~0))

OR_2016<- OR_2016 %>%
  mutate(presidential_election=if_else(reg_year%%4==0, 1,0)) %>%
  mutate(midterm_election=if_else(reg_year%%4==2, 1, 0))

OR_2016$reg_duringAVR<-ifelse(OR_2016$reg_year==2016,1,0)

# Regression presented in Reviewer Material Appendix Table 1
logit_high_prob_reg<-glm(high_prob_reg ~ reg_duringAVR + midterm_election + presidential_election, data=subset(OR_2016, reg_year>2007 & birth_year >= 1932 & birth_year <= 1985 & voterhistory_byregyear==0), family=binomial(link='logit'))
summary(logit_high_prob_reg)

logitmargins<-margins(logit_high_prob_reg)
logitpredict<-prediction(logit_high_prob_reg, at= list(reg_duringAVR=0))
logitpredict2<-prediction(logit_high_prob_reg, at= list(reg_duringAVR=1))
mean(logitpredict$fitted, na.rm=TRUE)
mean(logitpredict2$fitted, na.rm=TRUE)
mean(logitpredict$fitted, na.rm=TRUE)- mean(logitpredict2$fitted, na.rm=TRUE)
# in absence of policy effect, probability of high probability birth year and
# registration year coinciding is 0.5
mean(logitpredict$fitted)-0.5
mean(logitpredict2$fitted)-0.5

would_be_motor<-(mean(logitpredict$fitted)-0.5)/(mean(logitpredict2$fitted)-0.5)
would_be_motor

### Calculating difference in voting rates with historical data and birthdate instrument


OR_2016<-OR_2016 %>%
  mutate(vote_yes201X=case_when(reg_year==2016 & voteyes_2016==1~ 1, 
                                reg_year==2016 & voteyes_2016==0~ 0,
                                reg_year==2014 & voteyes_2014==1~ 1, 
                                reg_year==2014 & voteyes_2014==0~ 0,
                                reg_year==2012 & voteyes_2012==1~ 1,
                                reg_year==2012 & voteyes_2012==0~ 0,
                                TRUE ~ NA_real_))

deadlineNov2014<-yday(ymd(20141014))
OR_2016$reg2014_preOct14<-ifelse(OR_2016$reg_day_of_year<=deadlineNov2014 & OR_2016$reg_year==2014,1,0)
OR_2016$reg2015_preProxy<-ifelse(OR_2016$reg_day_of_year<=cutoffNov2016 & OR_2016$reg_year==2015,1,0)
OR_2016$reg2013_preProxy<-ifelse(OR_2016$reg_day_of_year<=cutoffNov2016 & OR_2016$reg_year==2013,1,0)


OR_2016<-OR_2016 %>%
  mutate(reg201X_bydeadline=case_when(reg2014_preOct14==1 & reg_year==2014~1,
                                      reg2014_preOct14==0 & reg_year==2014~0,
                                      reg2012_preOct16==1 & reg_year==2012~1,
                                      reg2012_preOct16==0 & reg_year==2012~0,
                                      reg2016_preOct18==1 & reg_year==2016~1,
                                      reg2016_preOct18==0 & reg_year==2016~0,
                                      reg2015_preProxy==1 & reg_year==2015~1,
                                      reg2015_preProxy==0 & reg_year==2015~0, 
                                      reg2013_preProxy==1 & reg_year==2013~1,
                                      reg2013_preProxy==0 & reg_year==2013~0,
                                      
  ))


OR_2016<-OR_2016 %>%
  mutate(windowX=case_when(reg_year==2016 & (birth_day_of_year<=(cutoffNov2016+42) & birth_day_of_year>=(cutoffNov2016-42)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2016 & (birth_day_of_year<=(cutoffNov2016+42-1) & birth_day_of_year>=(cutoffNov2016-42-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
                           reg_year==2014 & (birth_day_of_year<=(deadlineNov2014+21) & birth_day_of_year>=(deadlineNov2014-63)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2014 & (birth_day_of_year<=(deadlineNov2014+21-1) & birth_day_of_year>=(deadlineNov2014-63-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
                           reg_year==2012 & (birth_day_of_year<=(deadlineNov2012+21) & birth_day_of_year>=(deadlineNov2012-63)) & leap_year(lub_birthdate)==TRUE ~ 1,
                           reg_year==2012 & (birth_day_of_year<=(deadlineNov2012+21-1) & birth_day_of_year>=(deadlineNov2012-63-1)) & leap_year(lub_birthdate)==FALSE ~ 1,
  ))


OR_2016<-OR_2016 %>%
  mutate(early_bdaygroupX=case_when(reg_year==2012 & birth_day_of_year>deadlineNov2012 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2012 & birth_day_of_year+1>deadlineNov2012 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2014 & birth_day_of_year>deadlineNov2014 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2014 & birth_day_of_year+1>deadlineNov2014 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2016 & birth_day_of_year>cutoffNov2016 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2016 & birth_day_of_year+1>cutoffNov2016 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2015 & birth_day_of_year>cutoffNov2016 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2015 & birth_day_of_year+1>cutoffNov2016 & leap_year(lub_birthdate)==FALSE ~ 0, 
                                    reg_year==2013 & birth_day_of_year>cutoffNov2016 & leap_year(lub_birthdate)==TRUE ~ 0, 
                                    reg_year==2013 & birth_day_of_year+1>cutoffNov2016 & leap_year(lub_birthdate)==FALSE ~ 0,
                                    TRUE ~ 1))

OR_2016<-OR_2016 %>%
  mutate(over18X=case_when(reg_year==2012 & age_at_2016election>=22 ~ 1,
                           reg_year==2014 & age_at_2016election>=20 ~ 1,
                           reg_year==2016 & age_at_2016election>=18 ~ 1))

OR_2016<-OR_2016 %>%
  mutate(post_AVR= case_when(reg_year==2012 ~ 0,
                             reg_year==2014  ~ 0,
                             reg_year==2016  ~ 1))

OR_2016$reg201X_bydeadline<-as.factor(OR_2016$reg201X_bydeadline)
OR_2016$early_bdaygroupX<-as.factor(OR_2016$early_bdaygroupX)
OR_2016$post_AVR<-as.factor(OR_2016$post_AVR)


### IV regression in Reviewer Material Appendix, Table 2
iv_diffdiff_new<-ivreg(vote_yes201X ~ reg201X_bydeadline*post_AVR + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + presidential_election   | early_bdaygroupX*post_AVR + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + presidential_election , data=subset(OR_2016, (reg_year==2012 | reg_year==2014 | reg_year==2016) & windowX==1 & over18X==1 & voterhistory_byregyear==0))

iv_diffdiff_new.rob  <- coeftest(iv_diffdiff_new, function(x) vcovHC(x, type="HC0"))
iv_diffdiff_new.sum<- summary(iv_diffdiff_new, vcov. = function(x) vcovHC(x, type="HC0"), diagnostics=T)
iv_diffdiff_new.sum

save(iv_diffdiff_new, iv_diffdiff_new.rob, iv_diffdiff_new.sum, file = "./Oregon/OR iv diffdiff new.RData")


# Calculate difference in voting rates (motor voter vs AVR)
iv_diffdiff_new<-ivreg(vote_yes201X ~ factor(reg201X_bydeadline)*factor(post_AVR) + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + presidential_election  | early_bdaygroupX*factor(post_AVR) + populous_county+white+black+hispanic+asian+democrat+republican+third_party+age_at_2016election + presidential_election , data=subset(OR_2016, (reg_year==2012 | reg_year==2014 | reg_year==2016) & windowX==1 & over18X==1 & voterhistory_byregyear==0))
summary(iv_diffdiff_new)
diffdiff_predictAVR<-prediction(iv_diffdiff_new, at = list(post_AVR = 1, reg201X_bydeadline="1"))
diffdiff_predictmotor<-prediction(iv_diffdiff_new, at = list(post_AVR = 0, reg201X_bydeadline="1"))

motor_turnout_diff<-mean(diffdiff_predictmotor$fitted)-mean(diffdiff_predictAVR$fitted)
motor_turnout_diff


# final turnout calculation

only_reg_avr<-(AVR_count*(1-would_be_trad))*(1-would_be_motor)
only_reg_avr
turnout_rate<-as.numeric(ivnewOR16$coefficients[2])
turnout_rate
turnout_rate_less_motor<-(turnout_rate-(motor_turnout_diff+turnout_rate)*would_be_motor)/(1-would_be_motor)
turnout_rate_less_motor
AVR_voter_calc<-only_reg_avr*turnout_rate_less_motor
AVR_voter_calc
AVR_voter_calc_VEP<-AVR_voter_calc/VEP
AVR_voter_calc_VEP

sink()
